
*** Created on: April , 2022 
*** Updated by: May 14, 2022
*** Sample used here is the same as the one used in the previous disclosure (sample.do)

global datapath  "/projects/data"
global project   "/projects/programs/jpe_revision/Oct_2021"
global tablepath "$project/out"

// cap log close
// log using "$project/log/for_disclosure_april_2022", text replace

********************************************************************************
********************************************************************************
***  Define Programs 
********************************************************************************
********************************************************************************
*** Count number of observations and firms 
	cap program drop countN
	program define countN
	syntax, sample(string)
	version 13
	quietly {
		preserve
		keep if `sample'==1
		cap drop temp*
		count
		local N_obs = r(N)						//number of observations
		egen tempfirm = group(firmid)
		sum tempfirm 
		local N_firm = r(max)						
		egen tempfirm_ind = group(naics firmid)
		sum tempfirm_ind 
		local N_firm_ind = r(max)
		drop temp*
		restore
		}	
		disp "`sample' " "#obs: " `N_obs' " #firm_ind_id:" `N_firm_ind' " #firmid:" `N_firm' 
		
	end

*** Winsorize variables
	cap program drop XJ_winsor
	program define XJ_winsor
	syntax, varlist(string) cell(string) pl(integer) pu(integer)
	version 13
	quietly {
	cap drop temp*
	foreach var in `varlist' {
		rename `var' nw_`var'
		by `cell', sort: egen temp1 = pctile(nw_`var'), p(`pl')
		by `cell', sort: egen temp2 = pctile(nw_`var'), p(`pu')
		gen `var' = nw_`var'
		replace `var' = temp1 if `var'<temp1 & nw_`var'!=. 
		replace `var' = temp2 if `var'>temp2 & nw_`var'!=.
		drop temp*
	}
	}
	end

*** Program For estadd Fixed Effects 
	cap program drop FEestadd 
	program define FEestadd
	version 16.0 
	syntax, spec(integer)
	if `spec'==1 {
		estadd local FE_ind_year = "Y", replace 
		estadd local FE_firm_ind = "", replace 
	}
	else if `spec'==2 {
		estadd local FE_ind_year = "Y", replace 
		estadd local FE_firm_ind = "Y", replace 
	}
	end

********************************************************************************
********************************************************************************
***  I. CONSTRUCT SAMPLES
********************************************************************************
********************************************************************************
clear 
use "$datapath/markup_estimation.dta", clear
count 

** Merge in age info. from LBD
/*preserve 
use "/projects/network/raw_data/lbd.dta", clear
keep if yr==1977|yr==1982|yr==1987|yr==1992|yr==1997|yr==2002|yr==2007|yr==2012

rename yr year 
rename emp emp_lbd
rename pay pay_lbd
rename firmid firmid_lbd
keep lbdnum year emp_lbd pay_lbd firmid_lbd firstyear lastyear
save "$datapath/lbd_for_cmf.dta", replace
restore*/

merge 1:1 lbdnum year using "$datapath/lbd_for_cmf.dta"
drop if _merge==2
tab year if _merge==1
drop _merge

*** Construct markup 
gen l0 = sw0
cap drop mp_*
gen mp_cshare = al*tvs/(l0)							//cost-share w/ total revenue
*** Based on alternative assumptions of returns-to-scale 
gen mp_cshare2 = al*(0.95)*tvs/(l0)
gen mp_cshare3 = al*(0.9)*tvs/(l0) 

	** Winsorize markup at 5 and 1 percentile 
	XJ_winsor, varlist(mp_cshare)    cell(year) pl(5) pu(95)		//winsorized at 5 percentile 
	
	XJ_winsor, varlist(nw_mp_cshare) cell(year) pl(1) pu(99)		//winsorized at 1 percentile 
	rename nw_mp_cshare mp_cshare_w1
	
	XJ_winsor, varlist(mp_cshare2)    cell(year) pl(5) pu(95)
	XJ_winsor, varlist(mp_cshare3)    cell(year) pl(5) pu(95)

*** Aggregate to firm level 
gcollapse (sum) tvs va1 sw sw_nl l0 (min) firstyear (mean) mp_cshare mp_cshare2 mp_cshare3 mp_cshare_w1 [aweight=l0], by (year naics firmid) 
count 

*** Generate variables 
gen lhs_cshare  = 1/mp_cshare + log(1-1/mp_cshare)
gen lhs_cshare2 = 1/mp_cshare2 + log(1-1/mp_cshare2)
gen lhs_cshare3 = 1/mp_cshare3 + log(1-1/mp_cshare3)
	
gen lhs_mp = log(mp_cshare) 
gen lntvs  = log(tvs)
	
*** SAMPLE 1 (Baseline sample)
gen sample1 = 1
	
*** SAMPLE 1A (mp_cshare >1)
gen sample1a = 0
	
	replace sample1a = 1 if lhs_cshare!=. 
	/// exclude industry-year cells with less than the minimum # of observations required
	cap drop temp*
	egen temp1 = sum(sample1a), by(naics year)
	replace sample1a = 0 if temp1<=####
	
	
*** SAMPLE 2A (mp_cshare >1 and firms appearing at least the minimum # of times)
egen firm_ind_id=group(naics firmid)

	///identify industry-year cells that contain less than the minimum number of firms required after dropping firms that showed up less than the minimum # of times required in the sample 
	preserve
	keep if sample1a==1
	forval i = 1(1)5 {
		cap drop temp*
		egen temp1 = sum(sample1a), by(firm_ind_id)
		drop if temp1<=####
		egen temp2 = sum(sample1a), by(naics year)
		drop if temp2<=####
	}
	gen sample2a = 1 
	keep year firmid naics firm_ind_id sample2a 
	tempfile sample2a
	save `sample2a', replace 
	restore
	
	preserve
	keep if sample1a==1
	cap drop temp*
	egen temp1 = sum(sample1a), by(firm_ind_id)
	drop if temp1<=####
	keep year firmid naics firm_ind_id
	
	merge 1:1 year firm_ind_id using `sample2a'
	gen tag = _merge==1 
	drop _merge 
	
	tempfile temptag 
	save `temptag', replace 
	restore 

merge 1:1 year firm_ind_id using `temptag'
drop _merge 
replace sample1a = 0 if tag==1			//update sample1a
gen sample1b = (sample1a==0)			//implicit sample 

replace sample2a = 0 if sample2a==.	
gen sample2b = (sample2a==0 & sample1a==1)	//implicit sample

*** SAMPLE 3A (mp_cshare2 >1)
gen sample3a = 0
	
	replace sample3a = 1 if lhs_cshare2!=. & sample1a==1
	/// exclude industry-year cells with less than the minimum # of observations required
	cap drop temp*
	egen temp1 = sum(sample3a), by(naics year)
	replace sample3a = 0 if temp1<=####
	
	
*** SAMPLE 4A (mp_cshare2 >1 and firms appearing at least twice)
	///identify industry-year cells that contain contain less than the minimum number of firms required after dropping firms that showed up less than the minimum # of times required in the sample 
	preserve
	keep if sample3a==1
	forval i = 1(1)5 {
		cap drop temp*
		egen temp1 = sum(sample3a), by(firm_ind_id)
		drop if temp1<=####
		egen temp2 = sum(sample3a), by(naics year)
		drop if temp2<=####
	}
	gen sample4a = 1 
	keep year firmid naics firm_ind_id sample4a 
	tempfile sample4a
	save `sample4a', replace 
	restore
	
	preserve
	keep if sample3a==1
	cap drop temp*
	egen temp1 = sum(sample3a), by(firm_ind_id)
	drop if temp1<=####
	keep year firmid naics firm_ind_id
	
	merge 1:1 year firm_ind_id using `sample4a'
	gen tag = _merge==1 
	drop _merge 
	
	tempfile temptag 
	save `temptag', replace 
	restore 

merge 1:1 year firm_ind_id using `temptag'
drop _merge
replace sample3a = 0 if tag==1			//update sample3a
gen sample3b = (sample3a==0 & sample1a==1)	//implicit sample between 1a and 3a

replace sample4a = 0 if sample4a==.	
gen sample4b = (sample4a==0 & sample3a==1)	//implicit sample between 3a and 4a
gen sample4b2= (sample4a==0 & sample2a==1)	//implicit sample between 2a and 4a

*** SAMPLE 5A (mp_cshare3 >1)
gen sample5a = 0
	
	replace sample5a = 1 if lhs_cshare3!=. & sample1a==1
	/// exclude industry-year cells with less than the minimum # of observations required
	cap drop temp*
	egen temp1 = sum(sample5a), by(naics year)
	replace sample5a = 0 if temp1<=####
	
	
*** SAMPLE 6A (mp_cshare3 >1 and firms appearing at least the minimum # of times)
	///identify industry-year cells that contain contain less than the minimum number of firms required after dropping firms that showed up less than the minimum # of times required in the sample 
	preserve
	keep if sample5a==1
	forval i = 1(1)5 {
		cap drop temp*
		egen temp1 = sum(sample5a), by(firm_ind_id)
		drop if temp1<=####
		egen temp2 = sum(sample5a), by(naics year)
		drop if temp2<=####
	}
	gen sample6a = 1 
	keep year firmid naics firm_ind_id sample6a 
	tempfile sample6a
	save `sample6a', replace 
	restore
	
	preserve
	keep if sample5a==1
	cap drop temp*
	egen temp1 = sum(sample5a), by(firm_ind_id)
	drop if temp1<=####
	keep year firmid naics firm_ind_id
	
	merge 1:1 year firm_ind_id using `sample6a'
	gen tag = _merge==1 
	drop _merge 
	
	tempfile temptag 
	save `temptag', replace 
	restore 

merge 1:1 year firm_ind_id using `temptag'
drop _merge
replace sample5a = 0 if tag==1			//update sample5a
gen sample5b = (sample5a==0 & sample3a==1)	//implicit sample between 3a and 5a

replace sample6a = 0 if sample6a==.	
gen sample6b = (sample6a==0 & sample5a==1)	//implicit sample between 5a and 6a
gen sample6b2= (sample6a==0 & sample4a==1)	//implicit sample between 4a and 6a

*** SAMPLE 1A_AGE 2A_AGE: With Non-missing Age
gen age = year-firstyear
replace age = . if age<0
tab year if age==. 

gen sample1a_age = (sample1a==1 & age!=.)
gen sample2a_age = (sample2a==1 & age!=.)

* Identify industry-year cell that only contain at least the minimum # of firms required
cap drop temp*
egen temp1 = sum(sample1a_age), by(naics year)
replace sample1a_age = 0 if temp1<=####			//update sample1a_age 

* Identify firms with non-missing age that show up less than the minimum # of times required
egen temp2 = sum(sample2a_age), by(firm_ind_id)
replace sample2a_age = 0 if temp2<=####

gen sample1b_age = (sample1a==1 & sample1a_age==0)	//implicit sample between sample1a and sample1a_age
gen sample2b_age = (sample1a_age==1 & sample2a_age==0)	//implicit sample between sample1a_age and sample2a_age 
gen sample2b_age2= (sample2a==1 & sample2a_age==0)	//implicit sample between sample2a and sample2a_age

*** SAMPLE 1A_CC 2A_CC: By Sectors Based on Concentration
gen naics3 = floor(naics/1000)
preserve
gsort naics3 year -tvs
by naics3 year: gen rank = [_n]
gen top4 = 1 if rank<=4
gen top4_tvs = top4*tvs
gcollapse (sum) tvs (sum) top4_tvs, by (naics3 year)
gen top4_share = top4_tvs/tvs
egen max_top4_share = max(top4_share), by (naics3)
gen concentrated =1 if max_top4_share>=0.4              //this 40% concentration flag appears in the associated output
replace concentrated = 0 if concentrated==.
	tempfile tempconc 
	save `tempconc', replace 
restore 

cap drop _merge
merge m:1 naics3 year using `tempconc'
drop _merge 

gen sample1a_cc = (sample1a==1 & concentrated == 1)
gen sample2a_cc = (sample2a==1 & concentrated == 1)
gen impl_sample2a_cc  = (sample1a_cc==1 & sample2a_cc==0)	//implicit sample between sample1a_cc and sample2a_cc

gen sample1b_cc = (sample1a==1 & concentrated == 0)		//implicit sample (also used for regression)
gen sample2b_cc = (sample2a==1 & concentrated == 0)		//implicit sample (also used for regression)
gen impl_sample2b_cc = (sample1b_cc==1 & sample2b_cc==0)	//impliciit sample between sample1b_cc and sample2b_cc
	
	
********************************************************************************
********************************************************************************
***  II. ANALYSIS
********************************************************************************
********************************************************************************

*** Baseline Regressions
reghdfe lhs_cshare lntvs if sample1a==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo bs_spec1

reghdfe lhs_cshare lntvs if sample2a==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo bs_spec2


*** Robustness Checks 
* Log-linear Approximation
reghdfe lhs_mp lntvs if sample1a==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo ll_spec1

reghdfe lhs_mp lntvs if sample2a==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo ll_spec2
	
* DRTS=0.95
reghdfe lhs_cshare2 lntvs if sample3a==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo drts95_spec1

reghdfe lhs_cshare2 lntvs if sample4a==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo drts95_spec2

* DRTS=0.9
reghdfe lhs_cshare3 lntvs if sample5a==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo drts90_spec1

reghdfe lhs_cshare3 lntvs if sample6a==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo drts90_spec2

* Add Age Controls 
reghdfe lhs_cshare lntvs age if sample1a_age==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo age_spec1

reghdfe lhs_cshare lntvs age if sample2a_age==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo age_spec2

* By Sector Concentration
//Concentrated
reghdfe lhs_cshare lntvs if sample1a_cc==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo cc_spec1

reghdfe lhs_cshare lntvs if sample2a_cc==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo cc_spec2

//Not Concentrated
reghdfe lhs_cshare lntvs if sample1b_cc==1, absorb(i.naics#i.year)
FEestadd, spec(1)
eststo ncc_spec1

reghdfe lhs_cshare lntvs if sample2b_cc==1, absorb(i.naics#i.year firm_ind_id)
FEestadd, spec(2)
eststo ncc_spec2


*** Make Tables
* Baseline 
local tablename = "Baseline"
esttab  bs_spec1 bs_spec2 using "$tablepath/`tablename'.csv", ///
	replace b(%9.3fc) se(%9.3fc) compress ///
	order(_cons lntvs) ///
	star(* 0.10 ** 0.05 *** 0.01) ///
	stats(N r2 FE_ind_year FE_firm_ind,fmt(%9.0g %9.3f))
	
* Log-linear Approximation 
local tablename = "Robust_Loglinear"
esttab  ll_spec1 ll_spec2 using "$tablepath/`tablename'.csv", ///
	replace b(%9.3fc) se(%9.3fc) compress ///
	order(_cons lntvs) ///
	star(* 0.10 ** 0.05 *** 0.01) ///
	stats(N r2 FE_ind_year FE_firm_ind,fmt(%9.0g %9.3f))	
	
* DRTS
local tablename = "Robust_DRTS"
esttab  drts95_spec1 drts95_spec2 drts90_spec1 drts90_spec2 using "$tablepath/`tablename'.csv", ///
	replace b(%9.3fc) se(%9.3fc) compress ///
	order(_cons lntvs) ///
	star(* 0.10 ** 0.05 *** 0.01) ///
	stats(N r2 FE_ind_year FE_firm_ind,fmt(%9.0g %9.3f))	
	
* Add Age Controls 
local tablename = "Robust_AgeControl"
esttab  age_spec1 age_spec2 using "$tablepath/`tablename'.csv", ///
	replace b(%9.3fc) se(%9.3fc) compress ///
	order(_cons lntvs) ///
	star(* 0.10 ** 0.05 *** 0.01) ///
	stats(N r2 FE_ind_year FE_firm_ind,fmt(%9.0g %9.3f))
	
* By Sector Concentration
local tablename = "Robust_Concentration"
esttab  cc_spec1 cc_spec2 ncc_spec1 ncc_spec2 using "$tablepath/`tablename'.csv", ///
	replace b(%9.3fc) se(%9.3fc) compress ///
	order(_cons lntvs) ///
	star(* 0.10 ** 0.05 *** 0.01) ///
	stats(N r2 FE_ind_year FE_firm_ind,fmt(%9.0g %9.3f))
	
	
********************************************************************************
********************************************************************************
***  II. DISCLOSURE ANALYSIS
********************************************************************************
********************************************************************************
cd "$project/discstats"
	
local varlist = "tvs sw l0"

foreach var in `varlist' {
di "`var'"

*** Tab 1: Log-linear Approximation
local tablename = "tab1"
global dummies "sample1a sample2a sample2b"
global spreadsheet "discstats_april_2022_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "Loglinear"


*** Tab 2: DRTS
local tablename = "tab2"
global dummies "sample1a sample3a sample4a sample5a sample6a sample3b sample4b sample5b sample6b sample4b2 sample6b2"
global spreadsheet "discstats_april_2022_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "DRTS"


*** Tab 3: Add Age Controls
local tablename = "tab3"
global dummies "sample1a sample1a_age sample2a_age sample1b_age sample2b_age sample2b_age2"
global spreadsheet "discstats_april_2022_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "AgeControl"

*** Tab 4: By Sector Concentration
local tablename = "tab4"
global dummies "sample1a sample1a_cc sample2a_cc sample1b_cc sample2b_cc impl_sample2a_cc impl_sample2b_cc"
global spreadsheet "discstats_april_2022_`tablename'_`var'"
global key "`var'"
global firm "firmid"  
capture !rm "$spreadsheet.dta" 
do "/projects/disclosure/discstats.do" "BySectorCR4"

}	
	
	
	
// log close 
